Gene Ontology Analysis analysis of LCM RNA Data

Managing Packages Using Renv

To run this code in my project using the renv environment, run the following lines of code

install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile

Load packages

library("ViSEAGO")
## 
## Warning: replacing previous import 'data.table::set' by 'dendextend::set' when
## loading 'ViSEAGO'
## Warning: replacing previous import 'dendextend::cutree' by 'stats::cutree' when
## loading 'ViSEAGO'
## Warning: replacing previous import 'AnnotationDbi::tail' by 'utils::tail' when
## loading 'ViSEAGO'
require("topGO")
## Loading required package: topGO
## Loading required package: BiocGenerics
## Loading required package: generics
## 
## Attaching package: 'generics'
## The following objects are masked from 'package:base':
## 
##     as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
##     setequal, union
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
##     unsplit, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: SparseM
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
require("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ ggplot2::annotate()   masks ViSEAGO::annotate()
## ✖ stringr::boundary()   masks graph::boundary()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select()       masks AnnotationDbi::select()
## ✖ dplyr::slice()        masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#BiocManager::install("GO.db")

sessionInfo() #provides list of loaded packages and version of R.
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.0.1
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices datasets  utils     methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.4      forcats_1.0.1        stringr_1.6.0       
##  [4] dplyr_1.1.4          purrr_1.2.0          readr_2.1.5         
##  [7] tidyr_1.3.1          tibble_3.3.0         ggplot2_4.0.0       
## [10] tidyverse_2.0.0      topGO_2.62.0         SparseM_1.84-2      
## [13] GO.db_3.22.0         AnnotationDbi_1.72.0 IRanges_2.44.0      
## [16] S4Vectors_0.48.0     Biobase_2.70.0       graph_1.88.0        
## [19] BiocGenerics_0.56.0  generics_0.1.4       ViSEAGO_1.24.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3     rstudioapi_0.17.1      jsonlite_2.0.0        
##   [4] shape_1.4.6.1          magrittr_2.0.4         farver_2.1.2          
##   [7] rmarkdown_2.30         GlobalOptions_0.1.2    fs_1.6.6              
##  [10] vctrs_0.6.5            memoise_2.0.1          RCurl_1.98-1.17       
##  [13] webshot_0.5.5          htmltools_0.5.8.1      progress_1.2.3        
##  [16] dynamicTreeCut_1.63-1  curl_7.0.0             sass_0.4.10           
##  [19] bslib_0.9.0            htmlwidgets_1.6.4      plyr_1.8.9            
##  [22] httr2_1.2.1            plotly_4.11.0          cachem_1.1.0          
##  [25] igraph_2.2.1           lifecycle_1.0.4        iterators_1.0.14      
##  [28] pkgconfig_2.0.3        Matrix_1.7-4           R6_2.6.1              
##  [31] fastmap_1.2.0          clue_0.3-66            digest_0.6.37         
##  [34] colorspace_2.1-2       RSQLite_2.4.3          seriation_1.5.8       
##  [37] filelock_1.0.3         timechange_0.3.0       httr_1.4.7            
##  [40] compiler_4.5.2         bit64_4.6.0-1          withr_3.0.2           
##  [43] doParallel_1.0.17      S7_0.2.0               BiocParallel_1.44.0   
##  [46] viridis_0.6.5          DBI_1.2.3              UpSetR_1.4.0          
##  [49] heatmaply_1.6.0        dendextend_1.19.1      R.utils_2.13.0        
##  [52] biomaRt_2.66.0         rappdirs_0.3.3         rjson_0.2.23          
##  [55] tools_4.5.2            R.oo_1.27.1            glue_1.8.0            
##  [58] DiagrammeR_1.0.11      GOSemSim_2.36.0        grid_4.5.2            
##  [61] cluster_2.1.8.1        fgsea_1.36.0           gtable_0.3.6          
##  [64] tzdb_0.5.0             R.methodsS3_1.8.2      ca_0.71.1             
##  [67] data.table_1.17.8      hms_1.1.4              XVector_0.50.0        
##  [70] foreach_1.5.2          pillar_1.11.1          yulab.utils_0.2.1     
##  [73] circlize_0.4.16        BiocFileCache_3.0.0    lattice_0.22-7        
##  [76] renv_1.1.5             bit_4.6.0              tidyselect_1.2.1      
##  [79] registry_0.5-1         ComplexHeatmap_2.26.0  Biostrings_2.78.0     
##  [82] knitr_1.50             gridExtra_2.3          Seqinfo_1.0.0         
##  [85] xfun_0.54              matrixStats_1.5.0      DT_0.34.0             
##  [88] visNetwork_2.1.4       stringi_1.8.7          lazyeval_0.2.2        
##  [91] yaml_2.3.10            evaluate_1.0.5         codetools_0.2-20      
##  [94] BiocManager_1.30.26    cli_3.6.5              jquerylib_0.1.4       
##  [97] dichromat_2.0-0.1      Rcpp_1.1.0             dbplyr_2.5.1          
## [100] png_0.1-8              XML_3.99-0.19          parallel_4.5.2        
## [103] assertthat_0.2.1       blob_1.2.4             prettyunits_1.2.0     
## [106] AnnotationForge_1.52.0 bitops_1.0-9           viridisLite_0.4.2     
## [109] scales_1.4.0           crayon_1.5.3           GetoptLong_1.0.5      
## [112] rlang_1.1.6            cowplot_1.2.0          fastmatch_1.1-6       
## [115] KEGGREST_1.50.0        TSP_1.2-5

Description of pipeline

I am going to perform functional enrichment of GO terms using ViSEAGO.

I am following this vignette: http://bioconductor.unipi.it/packages/devel/bioc/vignettes/ViSEAGO/inst/doc/ViSEAGO.html.

Load in reference files and differential expression data

In the next chunk I am loading in my DESeq data. These results are ordered by adjusted p-value. As a reminder, negative LFC = higher in Oral tissue, and positive LFC = higher in Aboral tissue.

#load in DESeq results
DESeq <- read.csv("../output_RNA/differential_expression/DESeq_results.csv", header = TRUE) %>% dplyr::rename("query" ="X")

#make dataframes of just differentially expressed genes for each LFC direction - filtering a little more stringent, abs(LFC) >2
DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange > 1)
DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange < -1)

#load in annotation data 
annot_tab <- read.delim("../references/annotation/protein-GO.tsv") %>% dplyr::rename(GOs = GeneOntologyIDs)

#filter annotation data for just expressed genes with GO annotations
annot_tab <- annot_tab %>% filter(query %in% DESeq$query) 

annot_tab$GOs <- gsub("; ", ";", annot_tab$GOs)
annot_tab$GOs[annot_tab$GOs==""] <- NA
annot_tab <- annot_tab %>% filter(!is.na(GOs))

nrow(annot_tab)
## [1] 10638
nrow(annot_tab)/nrow(DESeq)
## [1] 0.7354812

10638/14464 genes in our dataset have GO information in this file. That is 74%.

sum(annot_tab$query %in% DE_05_Aboral$query)
## [1] 379
sum(annot_tab$query %in% DE_05_Aboral$query)/nrow(DE_05_Aboral)
## [1] 0.6792115
sum(annot_tab$query %in% DE_05_OralEpi$query)
## [1] 796
sum(annot_tab$query %in% DE_05_OralEpi$query)/nrow(DE_05_OralEpi)
## [1] 0.6546053

379/558 genes that are significantly upregulated in the Aboral tissue have annotation information. That is 68% of the genes.

796/1216 genes that are significantly upregulated in the Oral Epidermis tissue have annotation information. That is 71% of the genes.

Create custom GO annotation file for ViSEAGO

##Get a list of GO Terms for all genes
annots <- annot_tab %>% dplyr::select(query,GOs,ProteinNames) %>% dplyr::rename("GO.terms" = GOs)

# format into the format required by ViSEAGO for custom mappings
Custom_GOs <- annots %>%
  # Separate GO terms into individual rows
  separate_rows(GO.terms, sep = ";") %>%
  # Add necessary columns
  mutate(
    taxid = "pacuta",
    gene_symbol = ProteinNames,
    evidence = "SwissProt"
  ) %>%
  # Rename columns
  dplyr::rename(
    gene_id = query,
    GOID = GO.terms
  ) %>%
  dplyr::select(taxid, gene_id, gene_symbol, GOID, evidence)

Custom_GOs_valid <- Custom_GOs %>% filter(GOID %in% keys(GO.db))

write.table(Custom_GOs_valid, "../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt",row.names = FALSE, sep = "\t", quote = FALSE,col.names=TRUE)

length(unique(Custom_GOs$gene_id))
## [1] 10638
length(unique(Custom_GOs_valid$gene_id))
## [1] 10637

We seem to have lost one gene when filtering for valid GO terms, so I need to account for that below.

load the file into ViSEAGO

Custom_Pacuta <- ViSEAGO::Custom2GO("../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt")
## 'select()' returned 1:1 mapping between keys and columns
myGENE2GO_Pacuta <- ViSEAGO::annotate(
    id="pacuta",
    Custom_Pacuta
)

Create gene lists for enrichment

selection <- DESeq %>% filter(query %in% Custom_GOs_valid$gene_id) %>% 
                       mutate(DE_05_Aboral = ifelse(query %in% DE_05_Aboral$query, 1,0)) %>%
                       mutate(DE_05_Oral = ifelse(query %in% DE_05_OralEpi$query, 1,0)) %>%
                       mutate(expressed = 1)

selection_Aboral <- selection %>% pull(DE_05_Aboral) %>% as.factor()
names(selection_Aboral) <- selection %>% pull(query)

selection_Oral <- selection %>% pull(DE_05_Oral) %>% as.factor()
names(selection_Oral) <- selection %>% pull(query)

expressed <- selection %>% pull(expressed) %>% as.factor()
names(expressed) <- selection %>% pull(query)

Oral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)

BP_Oral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="BP",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 8797 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 12496 GO terms and 27095 relations. )
## 
## Annotating nodes ...............
##  ( 9496 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
    BP_Oral,
    algorithm ="elim",
    statistic = "fisher"
)
## 
##           -- Elim Algorithm -- 
## 
##       the algorithm is scoring 4392 nontrivial nodes
##       parameters: 
##           test statistic: fisher
##           cutOff: 0.01
## 
##   Level 19:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 18:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  18 nodes to be scored   (0 eliminated genes)
## 
##   Level 15:  39 nodes to be scored   (0 eliminated genes)
## 
##   Level 14:  42 nodes to be scored   (61 eliminated genes)
## 
##   Level 13:  70 nodes to be scored   (68 eliminated genes)
## 
##   Level 12:  156 nodes to be scored  (96 eliminated genes)
## 
##   Level 11:  305 nodes to be scored  (115 eliminated genes)
## 
##   Level 10:  460 nodes to be scored  (323 eliminated genes)
## 
##   Level 9:   577 nodes to be scored  (545 eliminated genes)
## 
##   Level 8:   635 nodes to be scored  (630 eliminated genes)
## 
##   Level 7:   676 nodes to be scored  (732 eliminated genes)
## 
##   Level 6:   625 nodes to be scored  (1158 eliminated genes)
## 
##   Level 5:   439 nodes to be scored  (1241 eliminated genes)
## 
##   Level 4:   242 nodes to be scored  (1270 eliminated genes)
## 
##   Level 3:   79 nodes to be scored   (1359 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (1359 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (1359 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
    Input = list( Oral_elim = c("BP_Oral", "elim_Oral")))
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
##  Oral_elim
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 796
##         feasible_genes: 9496
##         feasible_genes_significant: 687
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 60
##         feasible_genes: 9496
##         feasible_genes_significant: 687
##         genes_nodeSize: 5
##         Nontrivial_nodes: 4392 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
## - enrich GOs (in at least one list): 60 GO terms of 1 conditions.
##         Oral_elim : 60 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
    BP_Results,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral.csv"
)
  • enrichment pvalue cutoff: Oral_elim : 0.01
  • enrich GOs (in at least one list): Oral_elim : 60 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 796
##         feasible_genes: 9496
##         feasible_genes_significant: 687
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 60
##         feasible_genes: 9496
##         feasible_genes_significant: 687
##         genes_nodeSize: 5
##         Nontrivial_nodes: 4392 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
## - enrich GOs (in at least one list): 60 GO terms of 1 conditions.
##         Oral_elim : 60 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Oral <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 2
      )
    )
  ),
  samples.tree = NULL
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the dendextend package.
##   Please report the issue at <https://github.com/talgalili/dendextend/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Oral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2_Oral,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.csv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Oral,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Oral<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2_Oral,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Oral,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Oral <-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2_Oral,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Oral,
    "GOclusters")

Aboral Epidermis:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)

BP_Aboral <- ViSEAGO::create_topGOdata(
    geneSel=selection,
    allGenes=background,
    gene2GO=myGENE2GO_Pacuta, 
    ont="BP",
    nodeSize=5
)
## 
## Building most specific GOs .....
##  ( 8797 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 12496 GO terms and 27095 relations. )
## 
## Annotating nodes ...............
##  ( 9496 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
    BP_Aboral,
    algorithm ="elim",
    statistic = "fisher"
)
## 
##           -- Elim Algorithm -- 
## 
##       the algorithm is scoring 3092 nontrivial nodes
##       parameters: 
##           test statistic: fisher
##           cutOff: 0.01
## 
##   Level 17:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  6 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  21 nodes to be scored   (0 eliminated genes)
## 
##   Level 14:  24 nodes to be scored   (0 eliminated genes)
## 
##   Level 13:  44 nodes to be scored   (175 eliminated genes)
## 
##   Level 12:  85 nodes to be scored   (236 eliminated genes)
## 
##   Level 11:  184 nodes to be scored  (238 eliminated genes)
## 
##   Level 10:  284 nodes to be scored  (291 eliminated genes)
## 
##   Level 9:   377 nodes to be scored  (334 eliminated genes)
## 
##   Level 8:   450 nodes to be scored  (585 eliminated genes)
## 
##   Level 7:   472 nodes to be scored  (824 eliminated genes)
## 
##   Level 6:   489 nodes to be scored  (1100 eliminated genes)
## 
##   Level 5:   366 nodes to be scored  (1905 eliminated genes)
## 
##   Level 4:   194 nodes to be scored  (2346 eliminated genes)
## 
##   Level 3:   78 nodes to be scored   (2420 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (2578 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (2578 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
##  Aboral_elim
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 379
##         feasible_genes: 9496
##         feasible_genes_significant: 335
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 118
##         feasible_genes: 9496
##         feasible_genes_significant: 335
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3092 
##  - enrichment pvalue cutoff:
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 118 GO terms of 1 conditions.
##         Aboral_elim : 118 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
    BP_Results,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral.csv"
)
  • enrichment pvalue cutoff: Aboral_elim : 0.01
  • enrich GOs (in at least one list): Aboral_elim : 118 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Aboral_elim
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 379
##         feasible_genes: 9496
##         feasible_genes_significant: 335
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 118
##         feasible_genes: 9496
##         feasible_genes_significant: 335
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3092 
##  - enrichment pvalue cutoff:
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 118 GO terms of 1 conditions.
##         Aboral_elim : 118 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Aboral <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 2
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Aboral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2_Aboral,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.csv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Aboral,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Aboral<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2_Aboral,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2_Aboral,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Aboral<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2_Aboral,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2_Aboral,
    "GOclusters")

Plotting of both tissues, pre-clustered by tissue

Custom plotting of results

clustered_Oral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Oral@enrich_GOs@data) 

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Oral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
    Cluster.term = str_replace(cluster_full,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) #%>%
  #mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) #%>%
  #dplyr::select(cluster, Cluster.name)

clustered_Oral_DEGs_enrichedGO <- clustered_Oral_DEGs_enrichedGO %>%
  left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
  mutate(Tissue="Oral")

clustered_Aboral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Aboral@enrich_GOs@data)

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Aboral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
    Cluster.term = str_replace(cluster_full,"^\\d+_",""),
    Cluster.term = str_replace(Cluster.term,"_.*$","")
  ) #%>%
  #mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) #%>%
  #dplyr::select(cluster, Cluster.name)
clustered_Aboral_DEGs_enrichedGO <- clustered_Aboral_DEGs_enrichedGO %>%
  left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
  mutate(Tissue="Aboral")


clustered_allDEGs_enrichedGO <- rbind(clustered_Oral_DEGs_enrichedGO %>%
                                        dplyr::select(-contains("Oral")),
                                      clustered_Aboral_DEGs_enrichedGO %>%
                                        dplyr::select(-contains("Aboral")))

Plot

library(dplyr)
library(tidyr)
library(ggplot2)

# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
  dplyr::select(Cluster.name, Tissue) %>%
  group_by(Cluster.name) %>%
  summarise(
    Oral_terms = sum(Tissue=="Oral"),
    Aboral_terms = sum(Tissue=="Aboral"),
    .groups = "drop"
  ) %>%
  mutate(
    deviation = Aboral_terms - Oral_terms,
    Oral_terms = -Oral_terms  # Make negative for bidirectional plot
  ) %>%
  pivot_longer(cols = c("Oral_terms", "Aboral_terms"), 
               names_to = "Tissue", values_to = "n_terms") %>%
  mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))

# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
  distinct(Cluster.name, deviation) %>%
  arrange(deviation)

oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_clustered_separate.png", p, width = 10, height = 8, dpi = 300)

Both tissues together:

create topGO objects and perform enrichment using topGO wrapped by ViSEAGO

BP_Results <- ViSEAGO::merge_enrich_terms(
    Input = list(Oral_elim = c("BP_Oral", "elim_Oral"),
                 Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns

Visualize and save initial results

# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
##  Oral_elim
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 796
##         feasible_genes: 9496
##         feasible_genes_significant: 687
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 60
##         feasible_genes: 9496
##         feasible_genes_significant: 687
##         genes_nodeSize: 5
##         Nontrivial_nodes: 4392 
##  Aboral_elim
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 379
##         feasible_genes: 9496
##         feasible_genes_significant: 335
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 118
##         feasible_genes: 9496
##         feasible_genes_significant: 335
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3092 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 178 GO terms of 2 conditions.
##         Oral_elim : 60 terms
##         Aboral_elim : 118 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
    BP_Results,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05.csv"
)
  • enrichment pvalue cutoff: Oral_elim : 0.01 Aboral_elim : 0.01
  • enrich GOs (in at least one list): 178 GO terms of 2 conditions. Oral_elim : 60 terms Aboral_elim : 118 terms

Semantic similarity –> Cluster: Use Wang graph-based method to identify similarity between GO terms

# initialize 
myGOs<-ViSEAGO::build_GO_SS(
    gene2GO=myGENE2GO_Pacuta,
    enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
    myGOs,
   distance=c("Wang")
)

myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
##       BP_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 796
##         feasible_genes: 9496
##         feasible_genes_significant: 687
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Oral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 60
##         feasible_genes: 9496
##         feasible_genes_significant: 687
##         genes_nodeSize: 5
##         Nontrivial_nodes: 4392 
##  Aboral_elim
##       BP_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
##         available_genes: 10637
##         available_genes_significant: 379
##         feasible_genes: 9496
##         feasible_genes_significant: 335
##         genes_nodeSize: 5
##         nodes_number: 6675
##         edges_number: 14047
##       elim_Aboral 
##         description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt 
##         test_name: fisher p<0.01
##         algorithm_name: elim
##         GO_scored: 6675
##         GO_significant: 118
##         feasible_genes: 9496
##         feasible_genes_significant: 335
##         genes_nodeSize: 5
##         Nontrivial_nodes: 3092 
##  - enrichment pvalue cutoff:
##         Oral_elim : 0.01
##         Aboral_elim : 0.01
## - enrich GOs (in at least one list): 178 GO terms of 2 conditions.
##         Oral_elim : 60 terms
##         Aboral_elim : 118 terms
## - terms distances:  Wang

Multi Dimensional Scaling Plot of the GO-terms based on semantic similarity

# display MDSplot
ViSEAGO::MDSplot(myGOs,
                 "GOterms")

Heatmap + table of GO terms, with clusters aggregated by hclust with ward.D2 method

# GOterms heatmap with the default parameters
Wang_clusters_wardD2 <- ViSEAGO::GOterms_heatmap(
  myGOs,
  showIC = TRUE,
  showGOlabels = TRUE,
  GO.tree = list(
    tree = list(distance = "Wang", aggreg.method = "ward.D2"),
    cut = list(
      dynamic = list(
        pamStage = TRUE,
        pamRespectsDendro = TRUE,
        deepSplit = 2,
        minClusterSize = 4
      )
    )
  ),
  samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
    Wang_clusters_wardD2,
    "../output_RNA/differential_expression/semantic-enrichment/DE_05_cluster_heatmap_Wang_wardD2.csv"
)

Multi Dimensional Scaling Plot of the enriched GO-terms based on semantic similarity, colored by cluster

# display colored MDSplot
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOterms")

Further summarizing the Semantic Similarity-based clusters

# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
    Wang_clusters_wardD2,
    distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
    Wang_clusters_wardD2,
    "GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
    Wang_clusters_wardD2,
    tree=list(
        distance="BMA",
        aggreg.method="ward.D2"
    )
)
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
    Wang_clusters_wardD2,
    "GOclusters")

Custom plotting of results

clustered_allDEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2@enrich_GOs@data)

cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>% 
  mutate(
    cluster = str_extract(cluster_full, "\\d+"),
    Cluster.name = str_replace(cluster_full,".*?_.*?_","")
  ) %>%
  mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
  select(cluster, Cluster.name)

cluster_map
##    cluster                                              Cluster.name
## 1        1                             Cluster 1: neuron development
## 2        2                          Cluster 2: developmental process
## 3        3               Cluster 3: anatomical structure development
## 4        4               Cluster 4: anatomical structure development
## 5        5             Cluster 5: multicellular organism development
## 6        6                      Cluster 6: sensory organ development
## 7        7                             Cluster 7: system development
## 8        8                       Cluster 8: animal organ development
## 9        9                                      Cluster 9: transport
## 10      10                                     Cluster 10: transport
## 11      11                                 Cluster 11: cell adhesion
## 12      12                   Cluster 12: regulation of cell adhesion
## 13      13 Cluster 13: external encapsulating structure organization
## 14      14                        Cluster 14: cell junction assembly
## 15      15                            Cluster 15: biological_process
## 16      16                             Cluster 16: secretion by cell
## 17      17                           Cluster 17: cell-cell signaling
## 18      18                                     Cluster 18: signaling
## 19      19                          Cluster 19: response to stimulus
## 20      20                       Cluster 20: immune effector process
## 21      21                            Cluster 21: biological_process
## 22      22                 Cluster 22: response to external stimulus
## 23      23              Cluster 23: multicellular organismal process
## 24      24                        Cluster 24: nervous system process
## 25      25                             Cluster 25: metabolic process
## 26      26                             Cluster 26: metabolic process

Manually fix cluster names based on GO terms inside each cluster

cluster_map_fixed <- cluster_map #%>%
  # mutate(Cluster.name = str_replace(Cluster.name, "Cluster 2: developmental process", "Cluster 2: cell fate determination"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 4: anatomical structure development", "Cluster 4: regeneration & tissue morphogenesis"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 5: multicellular organism development", "Cluster 5: multicellular organism pattern formation"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 9: transport", "Cluster 9: metal ion transmembrane transport"),
  #        Cluster.name = str_replace(Cluster.name, "Cluster 10: transport", "Cluster 10: amino acid & small molecule transmembrane transport"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 12: nervous system process", "Cluster 12: sensory perception"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 16: biological_process", "Cluster 16: regeneration & wound healing"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 18: secretion by cell", "Cluster 18: neurotransmitter and hormone secretion"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 24: external encapsulating structure organization", "Cluster 24: extracellular matrix organization"),
  #       Cluster.name = str_replace(Cluster.name, "Cluster 25: biological_process", "Cluster 25: cell organization and homeostasis")
  #        )
clustered_allDEGs_enrichedGO <- clustered_allDEGs_enrichedGO %>%
  left_join(cluster_map_fixed, by = c("GO.cluster" = "cluster"))

write.csv(clustered_allDEGs_enrichedGO,"../output_RNA/differential_expression/semantic-enrichment/DE_05_clusters_named.csv")

library(DT)

# Interactive table with scrolling
datatable(clustered_allDEGs_enrichedGO,
          options = list(
            pageLength = 10,   # rows per page
            scrollX = TRUE,    # horizontal scroll if wide
            scrollY = "400px"  # vertical scroll
          ))

Plot

library(dplyr)
library(tidyr)
library(ggplot2)

# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
  select(Cluster.name, Oral_elim.Significant_genes, Aboral_elim.Significant_genes) %>%
  mutate(
    # Clean cluster names
    #Cluster.name = gsub("^Cluster \\d+:\\s*", "", Cluster.name),
    Oral_terms = ifelse(!is.na(Oral_elim.Significant_genes), 1, 0),
    Aboral_terms = ifelse(!is.na(Aboral_elim.Significant_genes), 1, 0)
  ) %>%
  group_by(Cluster.name) %>%
  summarise(
    Oral_terms = sum(Oral_terms),
    Aboral_terms = sum(Aboral_terms),
    .groups = "drop"
  ) %>%
  filter(Oral_terms + Aboral_terms > 0) %>%
  mutate(
    deviation = Aboral_terms - Oral_terms,
    Oral_terms = -Oral_terms  # Make negative for bidirectional plot
  ) %>%
  pivot_longer(cols = c("Oral_terms", "Aboral_terms"), 
               names_to = "Tissue", values_to = "n_terms") %>%
  mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))

datatable(cluster_data,
          options = list(
            pageLength = 10,   # rows per page
            scrollX = TRUE,    # horizontal scroll if wide
            scrollY = "400px"  # vertical scroll
          ))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
  distinct(Cluster.name, deviation) %>%
  arrange(deviation)

oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional.png", p, width = 10, height = 8, dpi = 300)

print(p)

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) +  #green: "#2E8B57"
  coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12, hjust=0)
  ) +
  scale_y_continuous(labels = abs) +
  scale_x_discrete(position = "top") +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_rightaxis.png", p, width = 10, height = 8, dpi = 300)

print(p)

# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
  geom_vline(xintercept = c(oral_to_equal, equal_to_aboral), 
             color = "grey60", linetype = "dashed", linewidth = 0.6) +
  scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
  #coord_flip() +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    axis.title = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(size = 12,angle = 65, hjust = 1),
    axis.text.y = element_text(size = 12)
  ) +
  scale_y_continuous(labels = abs) +
  labs(
    x = "Gene clusters",
    y = "Number of enriched GO terms",
    title = "Tissue-specific GO term enrichment patterns",
    subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
  )

p

ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_horiz.png", p, width = 12, height = 8, dpi = 300)